library("here")
library("sjPlot")
library("tidyverse")
library("lme4")
library("viridis")
library("lmerTest")
library("ggplot2")
library("gridExtra")
library("gt")
library("ggthemes")
source(here("p4_analysis", "analysis_functions.R"))
p4path <- here("p4_analysis", "outputs")
p3path <- here("p3_methods", "outputs")
1. MMRR
1.1 Individual sampling
1.1.1 Summary plots
mmrr_ind <- format_mmrr(here(p3path, "mmrr_indsampling_results.csv"))
# overall error
summary_hplot(mmrr_ind, "ratio_ae", colpal = "viridis", direction = -1)

summary_hplot(mmrr_ind, "env_ae", colpal = "viridis", direction = -1)

summary_hplot(mmrr_ind, "geo_ae", colpal = "viridis", direction = -1)

# bias
summary_hplot(mmrr_ind, "ratio_err", divergent = TRUE)

summary_hplot(mmrr_ind, "comboenv_err", divergent = TRUE)

summary_hplot(mmrr_ind, "geo_err", divergent = TRUE)

1.1.2 Model summaries
run_lmer(mmrr_ind, "ratio_ae", filepath = here(p4path, "MMRR_individual_ratioerr.csv"))
| Contrast |
Estimate |
SE |
Z ratio |
p |
| envgeo - grid |
−0.0046 |
0.0016 |
−2.7960 |
0.02658997 |
| envgeo - rand |
−0.0009 |
0.0016 |
−0.5319 |
0.95132229 |
| envgeo - trans |
−0.0249 |
0.0016 |
−15.0697 |
0.0 |
| grid - rand |
0.0037 |
0.0016 |
2.2641 |
0.10653142 |
| grid - trans |
−0.0202 |
0.0016 |
−12.2737 |
0.0 |
| rand - trans |
−0.0240 |
0.0016 |
−14.5378 |
0.0 |
run_lmer(mmrr_ind, "geo_ae", filepath = here(p4path, "MMRR_individual_geoerr.csv"))
| Contrast |
Estimate |
SE |
Z ratio |
p |
| envgeo - grid |
−0.0021 |
0.0007 |
−3.1344 |
0.009333831 |
| envgeo - rand |
−0.0010 |
0.0007 |
−1.5145 |
0.428730954 |
| envgeo - trans |
−0.0223 |
0.0007 |
−33.5784 |
0.0 |
| grid - rand |
0.0011 |
0.0007 |
1.6199 |
0.367294910 |
| grid - trans |
−0.0202 |
0.0007 |
−30.4440 |
0.0 |
| rand - trans |
−0.0213 |
0.0007 |
−32.0639 |
0.0 |
run_lmer(mmrr_ind, "env_ae", filepath = here(p4path, "MMRR_individual_enverr.csv"))
| Contrast |
Estimate |
SE |
Z ratio |
p |
| envgeo - grid |
−0.0005 |
0.0004 |
−1.2011 |
0.6261246 |
| envgeo - rand |
0.0005 |
0.0004 |
1.0594 |
0.7142981 |
| envgeo - trans |
−0.0045 |
0.0004 |
−10.1643 |
3.7 × 10−14 |
| grid - rand |
0.0010 |
0.0004 |
2.2605 |
0.1074219 |
| grid - trans |
−0.0040 |
0.0004 |
−8.9632 |
2.7 × 10−14 |
| rand - trans |
−0.0050 |
0.0004 |
−11.2237 |
8.0 × 10−15 |
1.1.3 Megaplots
MEGAPLOT(mmrr_ind, "ratio_ae", colpal = "viridis", direction = -1)

MEGAPLOT(mmrr_ind, "ratio_err", colpal = "viridis", divergent = TRUE)

MEGAPLOT(mmrr_ind, "comboenv_err", divergent = TRUE)

MEGAPLOT(mmrr_ind, "geo_err", divergent = TRUE)

1.2 Site sampling
1.2.1 Summary plots
mmrr_site <- format_mmrr(here(p3path, "mmrr_sitesampling_results.csv"))
# overall error
summary_hplot(mmrr_site, "ratio_ae", colpal = "viridis", direction = -1)

summary_hplot(mmrr_site, "env_ae", colpal = "viridis", direction = -1)

summary_hplot(mmrr_site, "geo_ae", colpal = "viridis", direction = -1)

# bias
summary_hplot(mmrr_site, "ratio_err", divergent = TRUE)

summary_hplot(mmrr_site, "comboenv_err", divergent = TRUE)

summary_hplot(mmrr_site, "geo_err", divergent = TRUE)

1.2.2 Model summaries
run_lmer(mmrr_site, "ratio_ae", filepath = here(p4path, "MMRR_site_ratioerr.csv"))
| Contrast |
Estimate |
SE |
Z ratio |
p |
| envgeo - equi |
−0.0340 |
0.0031 |
−10.8514 |
2.7 × 10−14 |
| envgeo - rand |
−0.0101 |
0.0031 |
−3.2163 |
3.7 × 10−3 |
| equi - rand |
0.0239 |
0.0031 |
7.6351 |
8.6 × 10−14 |
run_lmer(mmrr_site, "geo_ae", filepath = here(p4path, "MMRR_site_geoerr.csv"))
| Contrast |
Estimate |
SE |
Z ratio |
p |
| envgeo - equi |
0.0098 |
0.0015 |
6.5604 |
1.6 × 10−10 |
| envgeo - rand |
0.0018 |
0.0015 |
1.2065 |
0.4492773 |
| equi - rand |
−0.0080 |
0.0015 |
−5.3539 |
2.6 × 10−7 |
run_lmer(mmrr_site, "env_ae", filepath = here(p4path, "MMRR_site_enverr.csv"))
| Contrast |
Estimate |
SE |
Z ratio |
p |
| envgeo - equi |
−0.0116 |
0.0011 |
−10.4807 |
2.7 × 10−14 |
| envgeo - rand |
−0.0035 |
0.0011 |
−3.1708 |
4.3 × 10−3 |
| equi - rand |
0.0081 |
0.0011 |
7.3099 |
8.2 × 10−13 |
1.2.3 Megaplots
MEGAPLOT(mmrr_site, "ratio_ae", colpal = "viridis", direction = -1)

MEGAPLOT(mmrr_site, "ratio_err", divergent = TRUE)

MEGAPLOT(mmrr_site, "comboenv_err", divergent = TRUE)

MEGAPLOT(mmrr_site, "geo_err", divergent = TRUE)

2. GDM
2.1 Individual sampling
2.1.1 Summary plots
gdm_ind <- format_gdm(here(p3path, "gdm_indsampling_results.csv"))
# overall error
summary_hplot(gdm_ind, "ratio_ae", colpal = "viridis", direction = -1)

summary_hplot(gdm_ind, "env_ae", colpal = "viridis", direction = -1)

summary_hplot(gdm_ind, "geo_ae", colpal = "viridis", direction = -1)

# bias
summary_hplot(gdm_ind, "ratio_err", divergent = TRUE)

summary_hplot(gdm_ind, "comboenv_err", divergent = TRUE)

summary_hplot(gdm_ind, "geo_err", divergent = TRUE)

2.1.2 Model summaries
run_lmer(gdm_ind, "ratio_ae", filepath = here(p4path, "GDM_individual_ratioerr.csv"))
| Contrast |
Estimate |
SE |
Z ratio |
p |
| envgeo - grid |
−0.0135 |
0.0024 |
−5.7327 |
5.9 × 10−8 |
| envgeo - rand |
−0.0097 |
0.0024 |
−4.0895 |
2.5 × 10−4 |
| envgeo - trans |
−0.0198 |
0.0024 |
−8.3745 |
4.1 × 10−14 |
| grid - rand |
0.0039 |
0.0024 |
1.6429 |
0.35443857 |
| grid - trans |
−0.0062 |
0.0024 |
−2.6417 |
0.04111055 |
| rand - trans |
−0.0101 |
0.0024 |
−4.2845 |
1.1 × 10−4 |
run_lmer(gdm_ind, "geo_ae", filepath = here(p4path, "GDM_individual_geoerr.csv"))
| Contrast |
Estimate |
SE |
Z ratio |
p |
| envgeo - grid |
0.0899 |
0.0045 |
19.8550 |
0.0 |
| envgeo - rand |
0.0204 |
0.0045 |
4.5115 |
3.8 × 10−5 |
| envgeo - trans |
0.0493 |
0.0045 |
10.8910 |
4.0 × 10−14 |
| grid - rand |
−0.0695 |
0.0045 |
−15.3432 |
0.0 |
| grid - trans |
−0.0406 |
0.0045 |
−8.9659 |
2.7 × 10−14 |
| rand - trans |
0.0289 |
0.0045 |
6.3789 |
1.1 × 10−9 |
run_lmer(gdm_ind, "env_ae", filepath = here(p4path, "GDM_individual_enverr.csv"))
| Contrast |
Estimate |
SE |
Z ratio |
p |
| envgeo - grid |
−0.0026 |
0.0013 |
−2.0550 |
0.1681013 |
| envgeo - rand |
−0.0046 |
0.0013 |
−3.6538 |
1.5 × 10−3 |
| envgeo - trans |
−0.0106 |
0.0013 |
−8.3556 |
4.3 × 10−14 |
| grid - rand |
−0.0020 |
0.0013 |
−1.5990 |
0.3791273 |
| grid - trans |
−0.0080 |
0.0013 |
−6.3009 |
1.8 × 10−9 |
| rand - trans |
−0.0060 |
0.0013 |
−4.7013 |
1.5 × 10−5 |
2.1.3 Megaplots
MEGAPLOT(gdm_ind, "ratio_ae", colpal = "viridis", direction = -1)

MEGAPLOT(gdm_ind, "ratio_err", divergent = TRUE)

MEGAPLOT(gdm_ind, "comboenv_err", divergent = TRUE)

MEGAPLOT(gdm_ind, "geo_err", divergent = TRUE)

2.1.4 Prop NA
Confirm that the distribution of NAs is as expected and the
proportions are small
MEGAPLOT(gdm_ind, "geo_coeff", aggfunc = "prop_na", colpal = "mako")

2.2 Site sampling
2.2.1 Summary plots
gdm_site <- format_gdm(here(p3path, "gdm_sitesampling_results.csv"))
# overall error
summary_hplot(gdm_site, "ratio_ae", colpal = "viridis", direction = -1)

summary_hplot(gdm_site, "env_ae", colpal = "viridis", direction = -1)

summary_hplot(gdm_site, "geo_ae", colpal = "viridis", direction = -1)

# bias
summary_hplot(gdm_site, "ratio_err", divergent = TRUE)

summary_hplot(gdm_site, "comboenv_err", divergent = TRUE)

summary_hplot(gdm_site, "geo_err", divergent = TRUE)

2.2.2 Model summaries
run_lmer(gdm_site, "ratio_ae", filepath = here(p4path, "GDM_site_ratioerr.csv"))
| Contrast |
Estimate |
SE |
Z ratio |
p |
| envgeo - equi |
−0.0760 |
0.0044 |
−17.3557 |
0.0 |
| envgeo - rand |
−0.0272 |
0.0044 |
−6.1624 |
2.1 × 10−9 |
| equi - rand |
0.0488 |
0.0044 |
11.1986 |
7.3 × 10−15 |
run_lmer(gdm_site, "geo_ae", filepath = here(p4path, "GDM_site_geoerr.csv"))
| Contrast |
Estimate |
SE |
Z ratio |
p |
| envgeo - equi |
0.2882 |
0.0134 |
21.4290 |
0.0 |
| envgeo - rand |
0.0229 |
0.0136 |
1.6897 |
0.2091145 |
| equi - rand |
−0.2652 |
0.0134 |
−19.8333 |
0.0 |
run_lmer(gdm_site, "env_ae", filepath = here(p4path, "GDM_site_enverr.csv"))
| Contrast |
Estimate |
SE |
Z ratio |
p |
| envgeo - equi |
−0.0307 |
0.0034 |
−8.9621 |
2.1 × 10−14 |
| envgeo - rand |
−0.0267 |
0.0035 |
−7.7294 |
5.0 × 10−14 |
| equi - rand |
0.0040 |
0.0034 |
1.1684 |
0.4721507 |
2.2.3 Megaplots
MEGAPLOT(gdm_site, "ratio_ae", colpal = "viridis", direction = -1)

MEGAPLOT(gdm_site, "ratio_ae", colpal = "viridis", direction = -1)

MEGAPLOT(gdm_site, "ratio_ae", colpal = "viridis", direction = -1)

MEGAPLOT(gdm_site, "ratio_err", divergent = TRUE)

MEGAPLOT(gdm_site, "comboenv_err", divergent = TRUE)

MEGAPLOT(gdm_site, "geo_err", divergent = TRUE)

2.2.4 Prop NA
Confirm that the distribution of NAs is as expected and the
proportions are small
MEGAPLOT(gdm_site, "geo_coeff", aggfunc = "prop_na", colpal = "mako")

3. Comparison of error distirbutions for MMRR and GDM
mmrr_ind %>%
filter(sampstrat != "full") %>%
ggplot(aes(x = ratio_ae, fill = m, colour = m)) +
geom_density(alpha = 0.5) +
theme_few() +
scale_fill_viridis_d(direction = -1, end = 0.7, begin = 0.3, option = "mako") +
scale_color_viridis_d(direction = -1, end = 0.7, begin = 0.3, option = "mako")

gdm_ind %>%
filter(sampstrat != "full") %>%
ggplot(aes(x = ratio_ae, fill = m, colour = m)) +
geom_density(alpha = 0.5) +
theme_few() +
scale_fill_viridis_d(direction = -1, end = 0.7, begin = 0.3, option = "mako") +
scale_color_viridis_d(direction = -1, end = 0.7, begin = 0.3, option = "mako")
## Warning: Removed 5 rows containing non-finite values (stat_density).

4. Compare results of MMRR and GDM
plot(mmrr_ind$geo_coeff, gdm_ind$geo_coeff, col = gdm_ind$m)
legend("topleft", c("0.25", "1"), col = c("black", "red"), pch = 1)

plot(mmrr_ind$comboenv_coeff, gdm_ind$comboenv_coeff)

df <- data.frame(mmrr_ind,
geo_mg = gdm_ind$geo_coeff/mmrr_ind$geo_coeff,
env_mg = gdm_ind$comboenv_coeff/mmrr_ind$comboenv_coeff,
ratio_mg = gdm_ind$ratio/mmrr_ind$ratio )
summary_hplot(df, "geo_mg")

summary_hplot(df, "env_mg")

summary_hplot(df, "ratio_mg")

MEGAPLOT(df, "geo_mg")

MEGAPLOT(df, "env_mg")

MEGAPLOT(df, "ratio_mg")

plot(mmrr_site$geo_coeff, gdm_site$geo_coeff, col = gdm_site$m)
legend("topleft", c("0.25", "1"), col = c("black", "red"), pch = 1)

plot(mmrr_site$comboenv_coeff, gdm_site$comboenv_coeff)

df <- data.frame(mmrr_site,
geo_mg = gdm_site$geo_coeff/mmrr_site$geo_coeff,
env_mg = gdm_site$comboenv_coeff/mmrr_site$comboenv_coeff,
ratio_mg = gdm_site$ratio/mmrr_site$ratio )
summary_hplot(df, "geo_mg")

summary_hplot(df, "env_mg")

summary_hplot(df, "ratio_mg")
